#######################################################################
################ specification curve analyses #########################
#######################################################################

rm(list=ls())
# working directory should be folder extr_weather_rep:
getwd()

# Libraries
library(tidyverse)
library(rio)
library(lfe)
library(grDevices)

# for specification curve analysis
source("code/spec_chart_function.R")

# 0. import data ####
resp <- import("data/respondents_final.csv")

# exclude all respondents that are outside of Switzerland
resp <- dplyr::filter(resp, ID != 39)
resp <- dplyr::filter(resp, ID != 1000)
resp <- dplyr::filter(resp, ID != 1206)
resp <- dplyr::filter(resp, ID != 2117)

# variables
n_start <- which(names(resp) == "temp_extreme_w1w4_percent")
n_end <- which(names(resp) == "dis_binary_w1w4_summer")
treat <- names(resp[,c(n_start:n_end)])

# get variation in independent variables
quart1 <- apply(resp[,n_start:n_end], 2, quantile, 0.25)
quart3 <- apply(resp[,n_start:n_end], 2, quantile, 0.75)
rm(n_start); rm(n_end)

#######################################
# One plot per step ####
#######################################

# Set-up ###
graph_height <- 10

operat <- import("data/operationalisations.xlsx")
treat_labels <- operat[,2] %>% as.vector()
# treat_labels <- str_remove(treat,"extreme_") %>% str_remove(.,"_percent") %>% str_remove(.,"_w1w4")
treat_labels_list <- list("Temperature:" = treat_labels[1:20],
                          "Precipitation:" = treat_labels[21:28],
                          "Events:" = treat_labels[29:34])

## Step 2 - no covariates ####

# 0. Bring data into panel format
resp_step2 <- resp[,c("ID", treat, "w4_q15x3_rev", "w1_q16x4_rev")]
resp_step2 <- resp_step2 %>% rename(., c(outcome_w4 = w4_q15x3_rev,
                                         outcome_w1 = w1_q16x4_rev))
# rename all weather variables to _w4
resp_step2 <- resp_step2 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step2$new_variable <- 0
  names(resp_step2)[names(resp_step2)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step2_long <- pivot_longer(resp_step2, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))

resp_step2_long$wave <- str_remove(resp_step2_long$wave, "_w")
resp_step2_long$ID <- as.character(resp_step2_long$ID)

# 1. Baseline regression

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step2_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step2 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step2[,i] <-  (resp$w4_q15x3_rev - data$fe_w4[i]) - (resp$w1_q16x4_rev)
}
# set zeroes to NA
var_step2[var_step2==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step2 <- apply(var_step2, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step2)) %>% round(.,2)
# 0 - 4 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step2 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step2_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step2\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)

for (i in 1:length(neg)) {
  text(x=i-.1, y=-.24, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.215, "Effect Magnitude", col="black", font=2, cex = 0.7)
dev.off()

## Step 3 - no covariates ####

# 0. Bring data into panel format
resp_step3 <- resp[,c("ID", treat, "step3_index_w4", "step3_index_w1")]
resp_step3 <- resp_step3 %>% rename(., c(outcome_w4 = step3_index_w4,
                                         outcome_w1 = step3_index_w1))

# rename all weather variables to _w4
resp_step3 <- resp_step3 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step3$new_variable <- 0
  names(resp_step3)[names(resp_step3)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step3_long <- pivot_longer(resp_step3, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step3_long$wave <- str_remove(resp_step3_long$wave, "_w")
resp_step3_long$ID <- as.character(resp_step3_long$ID)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step3_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step3 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step3[,i] <-  (resp$step3_index_w4 - data$fe_w4[i]) - (resp$step3_index_w1)
}
# set zeroes to NA
var_step3[var_step3==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step3 <- apply(var_step3, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step3)) %>% round(.,2)
# 0 - 5 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step3 <- NULL


# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step3_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on step3\n(5-point scale)",       
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.08, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.07, "Magnitude", col="black", font=2, cex = 0.7)

dev.off()

## Step 4 MIP - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "w4_q5x9", "w1_q5x9")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q5x9,
                                         outcome_w1 = w1_q5x9))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step4 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step4[,i] <-  (resp$w4_q5x9 - data$fe_w4[i]) - (resp$w1_q5x9)
}
# set zeroes to NA
var_step4[var_step4==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step4 <- apply(var_step4, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step4)) %>% round(.,2)
# 0 - 4 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_mip_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.08, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.07, "Magnitude", col="black", font=2, cex = 0.7)

dev.off()

## Step 4 cand_loc - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "w4_q29", "w1_q25")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q29,
                                         outcome_w1 = w1_q25))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step4 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step4[,i] <-  (resp$w4_q29 - data$fe_w4[i]) - (resp$w1_q25)
}
# set zeroes to NA
var_step4[var_step4==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step4 <- apply(var_step4, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step4)) %>% round(.,2)
# 0 - 5 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_cand_loc_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.2, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.18, "Magnitude", col="black", font=2, cex = 0.7)

dev.off()

## Step 4 cand_nat - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "w4_q30", "w1_q26")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q30,
                                         outcome_w1 = w1_q26))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step4 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step4[,i] <-  (resp$w4_q30 - data$fe_w4[i]) - (resp$w1_q26)
}
# set zeroes to NA
var_step4[var_step4==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step4 <- apply(var_step4, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step4)) %>% round(.,2)
# 0 - 4 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_cand_nat_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       col.est=c("grey80","royalblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.17, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.155, "Magnitude", col="black", font=2, cex = 0.7)

dev.off()

## Step 4 policy_pref - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "policy_pref_w4", "policy_pref_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = policy_pref_w4,
                                         outcome_w1 = policy_pref_w1))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step4 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step4[,i] <-  (resp$policy_pref_w4 - data$fe_w4[i]) - (resp$policy_pref_w1)
}
# set zeroes to NA
var_step4[var_step4==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step4 <- apply(var_step4, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step4)) %>% round(.,2)
# 0 - 4 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins
pdf("plots/spec_curves/step4_policy_pref_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.06, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.05, "Magnitude", col="black", font=2, cex = 0.7)
text(x=28, y=+.07, "IPCC", col="black", font=2, cex = 1)
arrows(28.4, 0.06,  28.9, 0.04, col = "black", font = 2, length=0.1, lwd= 1.8)
text(x=32, y=+.1, "Meteo\nSwiss", col="black", font=2, cex = 1)
arrows(32.3, 0.083,  32.8, 0.063, col = "black", font = 2, length=0.1, lwd= 1.8)
dev.off()

## Step 4 pid_green - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "pid_green_w4", "pid_green_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = pid_green_w4,
                                         outcome_w1 = pid_green_w1))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# Calculate whether effect is meaningful:

# independent variable:
# jump between 75% and 25% percentile (this comes from 04_Analysis.R)
data$var <- quart3 - quart1
# effect of this jump on dependent variable
data$effect <- data$coef * data$var 
data$var <- NULL

# dependent variable
# variance: mean level of change, taking time and individual FE into account

# prepare empty matrix
var_step4 <- matrix(0, nrow = nrow(resp), ncol = nrow(data))

# calculate change within respondent, minus time fe, for each respondent and each operationalisation
# for one dependent variable
for (i in 1:nrow(data)){
  var_step4[,i] <-  (resp$pid_green_w4 - data$fe_w4[i]) - (resp$pid_green_w1)
}
# set zeroes to NA
var_step4[var_step4==0] <- NA
# we have no clear zeroes due to time fixed effects

# get mean level of change over all respondents: variance
data$variance_step4 <- apply(var_step4, 2, var, na.rm=T)
# --> this is the same for all, as time fixed effects only move everything by same fixed value

# which share of std. deviation (sqrt of variance) in dependent variable is explained
# by jump from 25% to 75% in independent variable?
data$negl <- 0
data$negl <- (abs(data$effect) / sqrt(data$variance_step4)) %>% round(.,2)
# 0 - 4 Percent

#sort by effect
data2 <- data[order(data$coef),] 
neg <- data2$negl %>% round(.,2)
# turn into character, keep 2 digits
neg <- as.character(neg)
neg[neg=="0"] <- "0.00"

rm(data2)
data$negl <- NULL
data$effect <- NULL
data$fe_w4 <- NULL
data$variance_step4 <- NULL


# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_pid_green_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
for (i in 1:length(neg)) {
  text(x=i-.1, y=-.07, neg[i], col="black", font=2, cex = 0.7)
}
text(x=mean(1:nrow(data)), y=-.06, "Magnitude", col="black", font=2, cex = 0.7)

dev.off()

## Step 1 perception (cross-sectional) - no covariates ####

# Set-up ###
treat_labels <- str_remove(treat,"extreme_") %>% str_remove(.,"_percent") %>% str_remove(.,"_w1w4")
graph_height <- 10

# get respondents who had treatment group "In Zukunft werden wir uns auf viele Hitzesommer wie in den Jahren 2018
# und 2019 in der Schweiz einstellen müssen" 

# subset by treatment variables
resp_step1 <- resp[resp$w4_treat2x2==2,]

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("w4_q15x13 ~ ", paste(x)))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- lm(as.formula(f), resp_step1)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared)
})

regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("w4_q15x13", str_c(treat,"$"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("w4_q15x13", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$w4_q15x13 <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step1_ordered_90.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step1\n(5-point scale)",
       heights=c(.4,1),
       leftmargin = 13)
dev.off()

#######################################
# One plot per step - strictness #####
#######################################

## Step 4 policy_pref - strictness ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("ID", treat, "policy_pref_w4", "policy_pref_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = policy_pref_w4,
                                         outcome_w1 = policy_pref_w1))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !ID,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$ID <- as.character(resp_step4_long$ID)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(ID) | 0 | ID")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_extreme_|dis_", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_extreme_|dis_", names(coef(reg)))]], 
             r2=summary(reg)$r.squared)
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL

# add a variable that shows the MEAN of this weather variable
data$mean <- NA
treat_where <-  which(names(data) %in% treat)
data$mean <- lapply(resp_step4_long[,(treat_where-1)], mean, na.rm = T) %>% as.numeric()
rm(treat_where)

# get original treatment
data$treat <- treat
treat_labels <- str_remove(data$treat,"extreme_") %>% str_remove(.,"_percent") %>% str_remove(.,"_w1w4")

# sort by mean!
data <- data[order(data$mean),] 

sig <- data$sig
data$sig <- NULL

# Set-up ###
# now we don't need the variable name anymore
data$treat <- NULL
data$mean <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels_step4 <-  treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_policy_pref_strictness.pdf", 
    width= 14, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels_step4, 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.4,1),
       leftmargin = 13)
text(x=25.5, y=+.06, "IPCC", col="black", font=2, cex = 1)
arrows(26, 0.05,  26.6, 0.027, col = "black", font = 2, length=0.1, lwd= 1.8)
text(x=4, y=+.1, "Meteo\nSwiss", col="black", font=2, cex = 1)
arrows(3, 0.083,  2.4, 0.063, col = "black", font = 2, length=0.1, lwd= 1.8)
text(x=1, y=-.06, "highest\n   strictness", col="black", font=1, cex = 0.7)
text(x=34, y=-.06, "     lowest\nstrictness", col="black", font=1, cex = 0.7)
dev.off()
